{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "d89df9bf",
   "metadata": {},
   "source": [
    "# Microbenchmarks on CPU\n",
    "This is a notebook for microbenchmarks running on CPU."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "d08c8bae",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.sql import SparkSession\n",
    "from pyspark.conf import SparkConf\n",
    "from time import time\n",
    "import os\n",
    "\n",
    "# Change to your cluster ip:port\n",
    "SPARK_MASTER_URL = os.getenv(\"SPARK_MASTER_URL\", \"spark://your-ip:port\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6842522a",
   "metadata": {},
   "source": [
    "Run the microbenchmark with retry times"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "45f50252",
   "metadata": {},
   "outputs": [],
   "source": [
    "def runMicroBenchmark(spark, appName, query, retryTimes):\n",
    "    count = 0\n",
    "    total_time = 0\n",
    "    # You can print the physical plan of each query\n",
    "    # spark.sql(query).explain()\n",
    "    while count < retryTimes:\n",
    "        start = time()\n",
    "        spark.sql(query).show(5)\n",
    "        end = time()\n",
    "        total_time += round(end - start, 2)\n",
    "        count = count + 1\n",
    "        print(\"Retry times : {}, \".format(count) + appName + \" Microbenchmark takes {} seconds\".format(round(end - start, 2)))\n",
    "    print(appName + \" Microbenchmark takes average {} seconds after {} retries\".format(round(total_time/retryTimes),retryTimes))\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "682c67b1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# You need to update data path with your real path and hardware resource!\n",
    "dataRoot = os.getenv(\"DATA_ROOT\", \"/data\")\n",
    "driverMem = os.getenv(\"DRIVER_MEM\", \"50g\")\n",
    "executorMem = os.getenv(\"EXECUTOR_MEM\", \"12g\")\n",
    "maxPartionBytes = os.getenv(\"MAX_PARTITION_BYTES\", \"1g\")\n",
    "executorCores = int(os.getenv(\"EXECUTOR_CORES\", \"4\"))\n",
    "# common spark settings\n",
    "conf = SparkConf()\n",
    "conf.setMaster(SPARK_MASTER_URL)\n",
    "conf.setAppName(\"Microbenchmark on CPU\")\n",
    "conf.set(\"spark.driver.memory\", driverMem)\n",
    "conf.set(\"spark.executor.memory\", executorMem)\n",
    "conf.set(\"spark.executor.cores\", executorCores)\n",
    " \n",
    "conf.set(\"spark.locality.wait\", \"0\")\n",
    "conf.set(\"spark.sql.files.maxPartitionBytes\", maxPartionBytes) \n",
    "conf.set(\"spark.dynamicAllocation.enabled\", \"false\") \n",
    "conf.set(\"spark.sql.adaptive.enabled\", \"true\")  \n",
    "\n",
    "# create spark session\n",
    "spark = SparkSession.builder.config(conf=conf).getOrCreate()\n",
    "# Load dataframe and create tempView\n",
    "spark.read.parquet(dataRoot + \"/tpcds/store_sales\").createOrReplaceTempView(\"store_sales\")\n",
    "spark.read.parquet(dataRoot + \"/tpcds/catalog_sales\").createOrReplaceTempView(\"catalog_sales\")\n",
    "spark.read.parquet(dataRoot + \"/tpcds/web_sales\").createOrReplaceTempView(\"web_sales\")\n",
    "spark.read.parquet(dataRoot + \"/tpcds/item\").createOrReplaceTempView(\"item\")\n",
    "spark.read.parquet(dataRoot + \"/tpcds/date_dim\").createOrReplaceTempView(\"date_dim\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "89512b77",
   "metadata": {},
   "source": [
    "### Expand&HashAggregate\n",
    "This is a microbenchmark about Expand&HashAggregate expressions running on the CPU. The query calculates the distinct value of some dimension columns and average birth year by different c_salutation of customers after grouping by c_current_hdemo_sk."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "3272ef56",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "# As a part of this query the size of the data in each task grows a lot. \n",
    "# By default, Spark will try to distribute the data among all the tasks in the cluster, \n",
    "# but on large clusters with large parquet files the splittable portions of the parquet files end up not being distributed evenly \n",
    "# and it is faster to re-partition the data to redistribute it than to deal with skew.\n",
    "spark.read.parquet(dataRoot + \"/tpcds/customer\").repartition(512).createOrReplaceTempView(\"customer\")\n",
    "\n",
    "print(\"-\"*50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "dd12d749",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "query = '''\n",
    "select c_current_hdemo_sk,\n",
    "count(DISTINCT if(c_salutation==\"Ms.\",c_salutation,null)) as c1,\n",
    "count(DISTINCT if(c_salutation==\"Mr.\",c_salutation,null)) as c12,\n",
    "count(DISTINCT if(c_salutation==\"Dr.\",c_salutation,null)) as c13,\n",
    "\n",
    "count(DISTINCT if(c_salutation==\"Ms.\",c_first_name,null)) as c2,\n",
    "count(DISTINCT if(c_salutation==\"Mr.\",c_first_name,null)) as c22,\n",
    "count(DISTINCT if(c_salutation==\"Dr.\",c_first_name,null)) as c23,\n",
    "\n",
    "count(DISTINCT if(c_salutation==\"Ms.\",c_last_name,null)) as c3,\n",
    "count(DISTINCT if(c_salutation==\"Mr.\",c_last_name,null)) as c32,\n",
    "count(DISTINCT if(c_salutation==\"Dr.\",c_last_name,null)) as c33,\n",
    "\n",
    "count(DISTINCT if(c_salutation==\"Ms.\",c_birth_country,null)) as c4,\n",
    "count(DISTINCT if(c_salutation==\"Mr.\",c_birth_country,null)) as c42,\n",
    "count(DISTINCT if(c_salutation==\"Dr.\",c_birth_country,null)) as c43,\n",
    "\n",
    "count(DISTINCT if(c_salutation==\"Ms.\",c_email_address,null)) as c5,\n",
    "count(DISTINCT if(c_salutation==\"Mr.\",c_email_address,null)) as c52,\n",
    "count(DISTINCT if(c_salutation==\"Dr.\",c_email_address,null)) as c53,\n",
    "\n",
    "count(DISTINCT if(c_salutation==\"Ms.\",c_login,null)) as c6,\n",
    "count(DISTINCT if(c_salutation==\"Mr.\",c_login,null)) as c62,\n",
    "count(DISTINCT if(c_salutation==\"Dr.\",c_login,null)) as c63,\n",
    "\n",
    "count(DISTINCT if(c_salutation==\"Ms.\",c_preferred_cust_flag,null)) as c7,\n",
    "count(DISTINCT if(c_salutation==\"Mr.\",c_preferred_cust_flag,null)) as c72,\n",
    "count(DISTINCT if(c_salutation==\"Dr.\",c_preferred_cust_flag,null)) as c73,\n",
    "\n",
    "count(DISTINCT if(c_salutation==\"Ms.\",c_birth_month,null)) as c8,\n",
    "count(DISTINCT if(c_salutation==\"Mr.\",c_birth_month,null)) as c82,\n",
    "count(DISTINCT if(c_salutation==\"Dr.\",c_birth_month,null)) as c83,\n",
    "\n",
    "avg(if(c_salutation==\"Ms.\",c_birth_year,null)) as avg1,\n",
    "avg(if(c_salutation==\"Mr.\",c_birth_year,null)) as avg2,\n",
    "avg(if(c_salutation==\"Dr.\",c_birth_year,null)) as avg3,\n",
    "avg(if(c_salutation==\"Miss.\",c_birth_year,null)) as avg4,\n",
    "avg(if(c_salutation==\"Mrs.\",c_birth_year,null)) as avg5,\n",
    "avg(if(c_salutation==\"Sir.\",c_birth_year,null)) as avg6,\n",
    "avg(if(c_salutation==\"Professor.\",c_birth_year,null)) as avg7,\n",
    "avg(if(c_salutation==\"Teacher.\",c_birth_year,null)) as avg8,\n",
    "avg(if(c_salutation==\"Agent.\",c_birth_year,null)) as avg9,\n",
    "avg(if(c_salutation==\"Director.\",c_birth_year,null)) as avg10\n",
    "from customer group by c_current_hdemo_sk\n",
    "'''\n",
    "print(\"-\"*50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "2e105bf8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+----+---+---+---+---+---+---+---+---+---+------------------+------------------+------------------+----+------------------+----+----+----+----+-----+\n",
      "|c_current_hdemo_sk| c1|c12|c13| c2|c22|c23| c3|c32|c33| c4|c42|c43| c5|c52| c53| c6|c62|c63| c7|c72|c73| c8|c82|c83|              avg1|              avg2|              avg3|avg4|              avg5|avg6|avg7|avg8|avg9|avg10|\n",
      "+------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+----+---+---+---+---+---+---+---+---+---+------------------+------------------+------------------+----+------------------+----+----+----+----+-----+\n",
      "|              5803|  1|  1|  1|285|272|592|358|496|791|185|202|210|458|674|1177|  0|  0|  0|  2|  2|  2| 12| 12| 12|1959.5225806451613|1959.6557863501484|1958.5581196581197|null| 1958.873873873874|null|null|null|null| null|\n",
      "|              1591|  1|  1|  1|283|237|544|374|489|739|193|206|211|476|664|1144|  0|  0|  0|  2|  2|  2| 12| 12| 12|1957.3514644351465|1958.2278860569716|1958.6174672489083|null|1958.4357894736843|null|null|null|null| null|\n",
      "|              3918|  1|  1|  1|300|266|539|392|499|755|190|203|210|507|675|1140|  0|  0|  0|  2|  2|  2| 12| 12| 12|1957.6745562130177|1958.2998522895125|1958.8992994746059|null|1959.4233009708737|null|null|null|null| null|\n",
      "|              1580|  1|  1|  1|296|256|562|392|499|808|190|203|211|499|692|1222|  0|  0|  0|  2|  2|  2| 12| 12| 12|1958.5771543086173|  1957.53591954023|1957.3303278688525|null|1958.3611691022963|null|null|null|null| null|\n",
      "|               148|  1|  1|  1|309|260|562|392|501|772|187|207|211|488|668|1154|  0|  0|  0|  2|  2|  2| 12| 12| 12| 1956.219008264463|1958.9161676646706|1957.8076256499132|null|1958.3412017167382|null|null|null|null| null|\n",
      "+------------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+----+---+---+---+---+---+---+---+---+---+------------------+------------------+------------------+----+------------------+----+----+----+----+-----+\n",
      "only showing top 5 rows\n",
      "\n",
      "Retry times : 1, Expand&HashAggregate Microbenchmark takes 65.21 seconds\n",
      "Expand&HashAggregate Microbenchmark takes average 65 seconds after 1 retries\n"
     ]
    }
   ],
   "source": [
    "# Run microbenchmark with n retry time\n",
    "runMicroBenchmark(spark,\"Expand&HashAggregate\",query ,1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "57da403a",
   "metadata": {},
   "source": [
    "### Windowing (without data skew)\n",
    "This is a microbenchmark about windowing expressions running on CPU mode. The sub-query calculates the average ss_sales_price of a fixed window function partition by ss_customer_sk, and the parent query calculates the average price of the sub-query grouping by each customer."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "68169e7f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "query = '''\n",
    "select ss_customer_sk,avg(avg_price) as avg_price\n",
    "from\n",
    "(\n",
    "SELECT ss_customer_sk ,avg(ss_sales_price) OVER (PARTITION BY ss_customer_sk order by ss_sold_date_sk ROWS BETWEEN 50 PRECEDING AND 50 FOLLOWING ) as avg_price\n",
    "FROM store_sales\n",
    "where ss_customer_sk is not null\n",
    ") group by ss_customer_sk order by 2 desc \n",
    "'''\n",
    "print(\"-\"*50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "f4d1d9ea",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------+------------------+\n",
      "|ss_customer_sk|         avg_price|\n",
      "+--------------+------------------+\n",
      "|      15924921|52.453036568858586|\n",
      "|      24796404|52.406491887877976|\n",
      "|      10174233|52.217149302596276|\n",
      "|      27571451| 52.14256448618126|\n",
      "|      14299506| 52.09827897444722|\n",
      "+--------------+------------------+\n",
      "only showing top 5 rows\n",
      "\n",
      "Retry times : 1, Windowing without skew Microbenchmark takes 176.61 seconds\n",
      "Windowing without skew Microbenchmark takes average 177 seconds after 1 retries\n"
     ]
    }
   ],
   "source": [
    "# Run microbenchmark with n retry time\n",
    "runMicroBenchmark(spark,\"Windowing without skew\",query , 1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7df0e850",
   "metadata": {},
   "source": [
    "### Windowing(with data skew)\n",
    "Data skew is caused by many null values in the ss_customer_sk column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "12ec99fb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "query = '''\n",
    "select ss_customer_sk,avg(avg_price) as avg_price\n",
    "from\n",
    "(\n",
    "SELECT ss_customer_sk ,avg(ss_sales_price) OVER (PARTITION BY ss_customer_sk order by ss_sold_date_sk ROWS BETWEEN 50 PRECEDING AND 50 FOLLOWING ) as avg_price\n",
    "FROM store_sales\n",
    ") group by ss_customer_sk order by 2 desc \n",
    "'''\n",
    "print(\"-\"*50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "86e12b88",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------+------------------+\n",
      "|ss_customer_sk|         avg_price|\n",
      "+--------------+------------------+\n",
      "|      15924921| 52.44865972015809|\n",
      "|      24796404|52.406491887877976|\n",
      "|      10174233|52.215293069577626|\n",
      "|      27571451| 52.14256448618126|\n",
      "|      14299506| 52.09827897444722|\n",
      "+--------------+------------------+\n",
      "only showing top 5 rows\n",
      "\n",
      "Retry times : 1, Windowing with skew Microbenchmark takes 1666.07 seconds\n",
      "Windowing with skew Microbenchmark takes average 1666 seconds after 1 retries\n"
     ]
    }
   ],
   "source": [
    "# Run microbenchmark with n retry time\n",
    "runMicroBenchmark(spark,\"Windowing with skew\",query ,1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2ef292cc",
   "metadata": {},
   "source": [
    "### Intersection\n",
    "This is a microbenchmark about intersection operation running on CPU mode. The query calculates items in the same brand, class, and category that are sold in all three sales channels in two consecutive years."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "30c8eb8e",
   "metadata": {},
   "outputs": [],
   "source": [
    "query = '''\n",
    "select i_item_sk ss_item_sk\n",
    " from item,\n",
    "    (select iss.i_brand_id brand_id, iss.i_class_id class_id, iss.i_category_id category_id\n",
    "     from store_sales, item iss, date_dim d1\n",
    "     where ss_item_sk = iss.i_item_sk\n",
    "                    and ss_sold_date_sk = d1.d_date_sk\n",
    "       and d1.d_year between 1999 AND 1999 + 2\n",
    "   intersect\n",
    "     select ics.i_brand_id, ics.i_class_id, ics.i_category_id\n",
    "     from catalog_sales, item ics, date_dim d2\n",
    "     where cs_item_sk = ics.i_item_sk\n",
    "       and cs_sold_date_sk = d2.d_date_sk\n",
    "       and d2.d_year between 1999 AND 1999 + 2\n",
    "   intersect\n",
    "     select iws.i_brand_id, iws.i_class_id, iws.i_category_id\n",
    "     from web_sales, item iws, date_dim d3\n",
    "     where ws_item_sk = iws.i_item_sk\n",
    "       and ws_sold_date_sk = d3.d_date_sk\n",
    "       and d3.d_year between 1999 AND 1999 + 2) x\n",
    " where i_brand_id = brand_id\n",
    "   and i_class_id = class_id\n",
    "   and i_category_id = category_id\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "d4f9f669",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----------+\n",
      "|ss_item_sk|\n",
      "+----------+\n",
      "|    326835|\n",
      "|    248465|\n",
      "|    174935|\n",
      "|    130715|\n",
      "|     78159|\n",
      "+----------+\n",
      "only showing top 5 rows\n",
      "\n",
      "Retry times : 1, NDS Q14a subquery Microbenchmark takes 62.42 seconds\n",
      "NDS Q14a subquery Microbenchmark takes average 62 seconds after 1 retries\n"
     ]
    }
   ],
   "source": [
    "# Run microbenchmark with n retry time\n",
    "runMicroBenchmark(spark,\"NDS Q14a subquery\",query ,1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5b051d6b",
   "metadata": {},
   "source": [
    "### Crossjoin\n",
    "This is a microbenchmark for a 1-million rows crossjoin with itself."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "56af3f00",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "# You have to stop the sparksession and create a new one \n",
    "# because in this query we need to create more executors with less cores to get the best performance\n",
    "spark.stop()\n",
    "conf = SparkConf()\n",
    "# Common spark settings\n",
    "conf.setMaster(SPARK_MASTER_URL)\n",
    "conf.setAppName(\"Crossjoin Microbenchmark on CPU\")\n",
    " \n",
    "conf.set(\"spark.driver.memory\", driverMem)\n",
    "conf.set(\"spark.executor.memory\", executorMem)\n",
    "conf.set(\"spark.executor.cores\", executorCores)\n",
    " \n",
    "conf.set(\"spark.locality.wait\", \"0\")\n",
    "conf.set(\"spark.sql.files.maxPartitionBytes\", maxPartionBytes) \n",
    "conf.set(\"spark.dynamicAllocation.enabled\", \"false\") \n",
    "conf.set(\"spark.sql.adaptive.enabled\", \"true\")\n",
    "# We can get a better performance by broadcast one table to change CartesianJoin to BroadCastNestLoopJoin\n",
    "conf.set(\"spark.sql.autoBroadcastJoinThreshold\",1000000000)\n",
    "# Get or create spark session\n",
    "spark = SparkSession.builder.config(conf=conf).getOrCreate()\n",
    "\n",
    "print(\"-\"*50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "ae9cdc08",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "scanning and writing parquet cost : 18.18 seconds\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "# Load dataframe and create tempView\n",
    "start = time() \n",
    "spark.read.parquet(dataRoot + \"/tpcds/customer\").limit(1000000).write.format(\"parquet\").mode(\"overwrite\").save(\"/data/tmp/customer1m\")\n",
    "end = time()\n",
    "print(\"scanning and writing parquet cost : {} seconds\".format(round(end - start, 2)))\n",
    "# We need to tune the partition number to get the best performance.\n",
    "spark.read.parquet(\"/data/tmp/customer1m\").repartition(16000).createOrReplaceTempView(\"costomer_df_1_million\")\n",
    "query = '''\n",
    "select count(*) from costomer_df_1_million c1 inner join costomer_df_1_million c2 on c1.c_customer_sk>c2.c_customer_sk\n",
    "'''\n",
    "print(\"-\"*50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "0571d861",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+------------+\n",
      "|    count(1)|\n",
      "+------------+\n",
      "|499999500000|\n",
      "+------------+\n",
      "\n",
      "Retry times : 1, Crossjoin Microbenchmark takes 78.8 seconds\n",
      "Crossjoin Microbenchmark takes average 79 seconds after 1 retries\n"
     ]
    }
   ],
   "source": [
    "# Run microbenchmark with n retry time\n",
    "runMicroBenchmark(spark,\"Crossjoin\",query ,1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "56f915c2-9b9a-4982-8c4e-5b570c17bfeb",
   "metadata": {},
   "source": [
    "### HashJoin\n",
    "This is a microbenchmark for a HashJoin. The query on GPU will be more than 10x times faster than CPU based on the cluster in the readme."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "040603c9-a96f-4017-bcdb-5f93e12996a4",
   "metadata": {},
   "outputs": [],
   "source": [
    "spark.read.parquet(dataRoot + \"/tpcds/store_sales\").createOrReplaceTempView(\"store_sales\")\n",
    "spark.read.parquet(dataRoot + \"/tpcds/store_returns\").createOrReplaceTempView(\"store_returns\")\n",
    "\n",
    "print(\"-\"*50)\n",
    "query = '''\n",
    "select  sum(store_sales.ss_ext_wholesale_cost)\n",
    "from store_sales\n",
    "join store_returns on (ss_item_sk = sr_item_sk) and (ss_addr_sk=sr_addr_sk)\n",
    "'''\n",
    "runMicroBenchmark(spark,\"HashJoin\",query,1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "7c118cc9",
   "metadata": {},
   "outputs": [],
   "source": [
    "spark.stop()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c9e43255",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
